From f6f1cf611307d5ec032526f6d701cf244c6e9331 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=C3=98yvind=20Kol=C3=A5s?= Date: Sat, 19 Nov 2016 15:21:20 +0100 Subject: [PATCH] fast-float: improve memory access order --- babl/babl-fish-path.c | 2 +- extensions/fast-float.c | 150 ++++++++++++++++++++-------------------- 2 files changed, 77 insertions(+), 75 deletions(-) diff --git a/babl/babl-fish-path.c b/babl/babl-fish-path.c index 51fbe69..cbe9618 100644 --- a/babl/babl-fish-path.c +++ b/babl/babl-fish-path.c @@ -31,7 +31,7 @@ #endif #define NUM_TEST_PIXELS (babl_get_num_path_test_pixels ()) -#define MAX_BUFFER_SIZE 1024 /* XXX: reasonable size for this should be profiled */ +#define MAX_BUFFER_SIZE 2048 /* XXX: reasonable size for this should be profiled */ int babl_in_fish_path = 0; diff --git a/extensions/fast-float.c b/extensions/fast-float.c index bafdcc0..7ee725a 100644 --- a/extensions/fast-float.c +++ b/extensions/fast-float.c @@ -298,13 +298,16 @@ conv_rgbaF_linear_rgbAF_gamma (unsigned char *src, while (n--) { - float alpha = fsrc[3]; + float red = *fsrc++; + float green = *fsrc++; + float blue = *fsrc++; + float alpha = *fsrc++; if (alpha == 1.0) { - *fdst++ = linear_to_gamma_2_2_lut (*fsrc++); - *fdst++ = linear_to_gamma_2_2_lut (*fsrc++); - *fdst++ = linear_to_gamma_2_2_lut (*fsrc++); - *fdst++ = *fsrc++; + *fdst++ = linear_to_gamma_2_2_lut (red); + *fdst++ = linear_to_gamma_2_2_lut (green); + *fdst++ = linear_to_gamma_2_2_lut (blue); + *fdst++ = alpha; } else if (alpha == 0.0) { @@ -312,15 +315,13 @@ conv_rgbaF_linear_rgbAF_gamma (unsigned char *src, *fdst++ = 0.0; *fdst++ = 0.0; *fdst++ = 0.0; - fsrc+=4; } else { - *fdst++ = linear_to_gamma_2_2_lut (*fsrc++) * alpha; - *fdst++ = linear_to_gamma_2_2_lut (*fsrc++) * alpha; - *fdst++ = linear_to_gamma_2_2_lut (*fsrc++) * alpha; + *fdst++ = linear_to_gamma_2_2_lut (red) * alpha; + *fdst++ = linear_to_gamma_2_2_lut (green) * alpha; + *fdst++ = linear_to_gamma_2_2_lut (blue) * alpha; *fdst++ = alpha; - fsrc++; } } return samples; @@ -338,35 +339,35 @@ conv_rgbaF_linear_rgbA8_gamma (unsigned char *src, while (n--) { - float alpha = fsrc[3]; + float red = *fsrc++; + float green = *fsrc++; + float blue = *fsrc++; + float alpha = *fsrc++; if (alpha >= 1.0) { - int val = linear_to_gamma_2_2_lut (fsrc[0]) * 0xff + 0.5f; - *cdst++ = val > 0xff ? 0xff : val < 0 ? 0 : val; - val = linear_to_gamma_2_2_lut (fsrc[1]) * 0xff + 0.5f; - *cdst++ = val > 0xff ? 0xff : val < 0 ? 0 : val; - val = linear_to_gamma_2_2_lut (fsrc[2]) * 0xff + 0.5f; - *cdst++ = val > 0xff ? 0xff : val < 0 ? 0 : val; + int val = linear_to_gamma_2_2_lut (red) * 0xff + 0.5f; + *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val; + val = linear_to_gamma_2_2_lut (green) * 0xff + 0.5f; + *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val; + val = linear_to_gamma_2_2_lut (blue) * 0xff + 0.5f; + *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val; *cdst++ = 0xff; - fsrc+=4; } else if (alpha <= 0.0) { *((uint32_t*)(cdst))=0; cdst+=4; - fsrc+=4; } else { float balpha = alpha * 0xff; - int val = linear_to_gamma_2_2_lut (fsrc[0]) * balpha + 0.5f; - *cdst++ = val > 0xff ? 0xff : val < 0 ? 0 : val; - val = linear_to_gamma_2_2_lut (fsrc[1]) * balpha + 0.5f; - *cdst++ = val > 0xff ? 0xff : val < 0 ? 0 : val; - val = linear_to_gamma_2_2_lut (fsrc[2]) * balpha + 0.5f; - *cdst++ = val > 0xff ? 0xff : val < 0 ? 0 : val; + int val = linear_to_gamma_2_2_lut (red) * balpha + 0.5f; + *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val; + val = linear_to_gamma_2_2_lut (green) * balpha + 0.5f; + *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val; + val = linear_to_gamma_2_2_lut (blue) * balpha + 0.5f; + *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val; *cdst++ = balpha + 0.5f; - fsrc+=4; } } return samples; @@ -374,46 +375,46 @@ conv_rgbaF_linear_rgbA8_gamma (unsigned char *src, static long conv_rgbaF_linear_rgbA8_gamma_cairo (unsigned char *src, - unsigned char *dst, - long samples) + unsigned char *dst, + long samples) { float *fsrc = (float *) src; - unsigned char *cdst = (unsigned char *) dst; - int n = samples; + unsigned char *cdst = (unsigned char *) dst; + int n = samples; - while (n--) - { - float alpha = fsrc[3]; - if (alpha >= 1.0) - { - int val = linear_to_gamma_2_2_lut (fsrc[2]) * 0xff + 0.5f; - *cdst++ = val > 0xff ? 0xff : val < 0 ? 0 : val; - val = linear_to_gamma_2_2_lut (fsrc[1]) * 0xff + 0.5f; - *cdst++ = val > 0xff ? 0xff : val < 0 ? 0 : val; - val = linear_to_gamma_2_2_lut (fsrc[0]) * 0xff + 0.5f; - *cdst++ = val > 0xff ? 0xff : val < 0 ? 0 : val; - *cdst++ = 0xff; - fsrc+=4; - } - else if (alpha <= 0.0) - { - *((uint32_t*)(cdst))=0; - cdst+=4; - fsrc+=4; - } - else - { - float balpha = alpha * 0xff; - int val = linear_to_gamma_2_2_lut (fsrc[2]) * balpha + 0.5f; - *cdst++ = val > 0xff ? 0xff : val < 0 ? 0 : val; - val = linear_to_gamma_2_2_lut (fsrc[1]) * balpha + 0.5f; - *cdst++ = val > 0xff ? 0xff : val < 0 ? 0 : val; - val = linear_to_gamma_2_2_lut (fsrc[0]) * balpha + 0.5f; - *cdst++ = val > 0xff ? 0xff : val < 0 ? 0 : val; - *cdst++ = balpha + 0.5f; - fsrc+=4; - } - } + while (n--) + { + float red = *fsrc++; + float green = *fsrc++; + float blue = *fsrc++; + float alpha = *fsrc++; + if (alpha >= 1.0) + { + int val = linear_to_gamma_2_2_lut (blue) * 0xff + 0.5f; + *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val; + val = linear_to_gamma_2_2_lut (green) * 0xff + 0.5f; + *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val; + val = linear_to_gamma_2_2_lut (red) * 0xff + 0.5f; + *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val; + *cdst++ = 0xff; + } + else if (alpha <= 0.0) + { + *((uint32_t*)(cdst))=0; + cdst+=4; + } + else + { + float balpha = alpha * 0xff; + int val = linear_to_gamma_2_2_lut (blue) * balpha + 0.5f; + *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val; + val = linear_to_gamma_2_2_lut (green) * balpha + 0.5f; + *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val; + val = linear_to_gamma_2_2_lut (red) * balpha + 0.5f; + *cdst++ = val >= 0xff ? 0xff : val <= 0 ? 0 : val; + *cdst++ = balpha + 0.5f; + } + } return samples; } @@ -428,29 +429,31 @@ conv_rgbAF_linear_rgbAF_gamma (unsigned char *src, while (n--) { - float alpha = fsrc[3]; + float red = *fsrc++; + float green = *fsrc++; + float blue = *fsrc++; + float alpha = *fsrc++; if (alpha < BABL_ALPHA_THRESHOLD) { *fdst++ = 0.0; *fdst++ = 0.0; *fdst++ = 0.0; *fdst++ = 0.0; - fsrc+=4; } else if (alpha >= 1.0) { - *fdst++ = linear_to_gamma_2_2_lut (*fsrc++); - *fdst++ = linear_to_gamma_2_2_lut (*fsrc++); - *fdst++ = linear_to_gamma_2_2_lut (*fsrc++); + *fdst++ = linear_to_gamma_2_2_lut (red); + *fdst++ = linear_to_gamma_2_2_lut (green); + *fdst++ = linear_to_gamma_2_2_lut (blue); *fdst++ = *fsrc++; } else { float alpha_recip = 1.0 / alpha; - *fdst++ = linear_to_gamma_2_2_lut (*fsrc++ * alpha_recip) * alpha; - *fdst++ = linear_to_gamma_2_2_lut (*fsrc++ * alpha_recip) * alpha; - *fdst++ = linear_to_gamma_2_2_lut (*fsrc++ * alpha_recip) * alpha; - *fdst++ = *fsrc++; + *fdst++ = linear_to_gamma_2_2_lut (red * alpha_recip) * alpha; + *fdst++ = linear_to_gamma_2_2_lut (green * alpha_recip) * alpha; + *fdst++ = linear_to_gamma_2_2_lut (blue * alpha_recip) * alpha; + *fdst++ = alpha; } } return samples; @@ -631,8 +634,7 @@ init (void) babl_conversion_new (rgbaF_linear, f32, "linear", conv_rgbaF_linear_rgbA8_gamma_cairo, NULL); } - o (rgbaF_linear, rgbA8_gamma); - + o (rgbaF_linear, rgbA8_gamma); o (rgbAF_linear, rgbAF_gamma); o (rgbaF_linear, rgbAF_gamma); o (rgbaF_linear, rgbaF_gamma); -- 2.30.2